This notebook inserts sound files into the pumilio database, assuming all files are already organized inside the pumilio file structure (.../pumilio/sounds/sounds/'ColID'/'SiteID'/).
sound_directory – directory containing organized sound files to be added to the database
sound_db_filepath – path to output csv file containing sound records ready to import into database
In [8]:
sound_directory = ""
In [ ]:
sound_db_filename = ""
In [7]:
import pandas
import re
from datetime import datetime
import os.path
from os import listdir
generate record of sound for database
In [10]:
def create_db_sound_record(sound, count):
SoundID = count + 100
SoundStatus = 0
QualityFlagID = 0
OtherSoundID = 0
DerviedSound = ''
DerviedFromSoundID = ''
ColID = sound['ColID']
DirID = sound['ID']
SoundName = sound['filename'].rstrip('.flac')
OriginalFilename = sound['filename']
FileSize = os.path.getsize(os.path.join(sound_directory, ColID, DirID, OriginalFilename))
MD5_hash = ''
AudioPreviewFilename = ''
AudioPreviewFormat = ''
dt = datetime.strptime(SoundName, '%y%m%d-%H%M%S')
Date = dt.strftime('%Y-%m-%d')
Time = dt.strftime('%H:%M:%S')
SamplingRate = 48000
BitRate = 16
Channels = 2
Duration = 300
SoundFormat = 'flac'
SiteID = int(sound['ID']) + 100
SensorID = 103
Notes = ''
SoundStats = 0
stamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
sound_table = pandas.DataFrame([[
SoundID,
SoundStatus,
QualityFlagID,
OtherSoundID,
DerviedSound,
DerviedFromSoundID,
ColID,
DirID,
SoundName,
OriginalFilename,
FileSize,
MD5_hash,
AudioPreviewFilename,
AudioPreviewFormat,
Date,
Time,
SamplingRate,
BitRate,
Channels,
Duration,
SoundFormat,
SiteID,
SensorID,
Notes,
SoundStats,
stamp
]], columns=['SoundID', 'SoundStatus', 'QualityFlagID', 'OtherSoundID', 'DerivedSound', 'DerivedFromSoundID', 'ColID', 'DirID', 'SoundName', 'OriginalFilename', 'FileSize', 'MD5_hash', 'AudioPreviewFilename', 'AudioPreviewFormat', 'Date', 'Time', 'SamplingRate', 'BitRate', 'Channels', 'Duration', 'SoundFormat', 'SiteID', 'SensorID', 'Notes', 'SoundStats', 'stamp'])
#print(sound_table)
global sounds_table
sounds_table = sounds_table.append(sound_table, ignore_index=True)
In [9]:
collections = listdir(sound_directory)
ColIDs = []
IDs = []
filenames = []
for collection in collections:
m = re.search('\d', collection)
if m:
ColID = m.string
sites = listdir(os.path.join(sound_directory, ColID))
for site in sites:
m = re.search('\d', site)
if m:
ID = m.string
sound_files = listdir(os.path.join(sound_directory, ColID, ID))
for sound_file in sound_files:
m = re.search('\d{6}-\d{6}\.flac', sound_file)
if m:
filename = m.string
filenames.append(filename)
ColIDs.append(ColID)
IDs.append(ID)
sounds = pandas.DataFrame({'filename': filenames,
'ColID': ColIDs,
'ID': IDs})
sounds = sounds.sort_values(by='filename')
In [11]:
sounds_table = pandas.DataFrame()
count = 1
for index, sound in sounds.iterrows():
create_db_sound_record(sound, count)
count = count + 1
sounds_table.to_csv(sound_db_filepath)